In [3]:
import numpy as np
import csv
import datetime
import pandas as pd
import matplotlib.pyplot as plt
import calendar
import matplotlib.pyplot as plt1; plt.rcdefaults()
import collections
from scipy.stats import chisquare
import folium as folium
import json
import requests
import os
In [4]:
YellowCapsJanuary = pd.read_csv('Data/01jan.csv')
YellowCapsFeburary = pd.read_csv('Data/02feb.csv')
YellowCapsMarch = pd.read_csv('Data/03mar.csv')
YellowCapsApril = pd.read_csv('Data/04apr.csv')
YellowCapsMay = pd.read_csv('Data/05may.csv')
YellowCapsJune = pd.read_csv('Data/06jun.csv')
TaxiZoneLookUp = pd.read_csv('Data/taxi_zone_lookup.csv')
NYC_Zones_geo = pd.read_json('Data/taxi_zones.json')
In [5]:
#RQ1
YellowCapsJanuary = YellowCapsJanuary.rename(columns = {'DOLocationID' : 'LocationID'})
RQ01 = pd.merge(YellowCapsJanuary, TaxiZoneLookUp, on="LocationID")
RQ01 = RQ01.Borough
McountJ = RQ01.value_counts()['Manhattan']
QcountJ = RQ01.value_counts()['Queens']
BcountJ = RQ01.value_counts()['Brooklyn']
UcountJ = RQ01.value_counts()['Unknown']
B1countJ = RQ01.value_counts()['Bronx']
EcountJ = RQ01.value_counts()['EWR']
SIcountJ = RQ01.value_counts()['Staten Island']
BoroughNames01 = ['Manhattan', 'Queens', 'Brooklyn', 'Unknown', 'Bronx', 'EWR', 'Staten Island']
y_pos1 = np.arange(len(BoroughNames01))
BoroughTrips01 = [McountJ,QcountJ,BcountJ,UcountJ,B1countJ,EcountJ,SIcountJ]
plt.bar(y_pos1, BoroughTrips01, align='center', alpha=0.5)
plt.xticks(y_pos1, BoroughNames01)
plt.ylabel('Average Trips')
plt.title('NYC Boroughs January Average Trips')
plt.show()
In [6]:
YellowCapsFeburary = YellowCapsFeburary.rename(columns = {'DOLocationID' : 'LocationID'})
RQ02 = pd.merge(YellowCapsFeburary, TaxiZoneLookUp, on="LocationID")
RQ02 = RQ02.Borough
McountF = RQ02.value_counts()['Manhattan']
QcountF = RQ02.value_counts()['Queens']
BcountF = RQ02.value_counts()['Brooklyn']
UcountF = RQ02.value_counts()['Unknown']
B1countF = RQ02.value_counts()['Bronx']
EcountF = RQ02.value_counts()['EWR']
SIcountF = RQ02.value_counts()['Staten Island']
BoroughNames02 = ['Manhattan','Queens','Brooklyn','Unknown','Bronx','EWR','Staten Island']
y_pos2 = np.arange(len(BoroughNames02))
BoroughTrips02 = [McountF,QcountF,BcountF,UcountF,B1countF,EcountF,SIcountF]
plt.bar(y_pos2, BoroughTrips02, align='center', alpha=0.5)
plt.xticks(y_pos2, BoroughNames02)
plt.ylabel('Average Trips')
plt.title('NYC Boroughs Feburary Average Trips')
plt.show()
In [7]:
YellowCapsMarch = YellowCapsMarch.rename(columns = {'DOLocationID' : 'LocationID'})
RQ03 = pd.merge(YellowCapsMarch, TaxiZoneLookUp, on="LocationID")
RQ03 = RQ03.Borough
McountM = RQ03.value_counts()['Manhattan']
QcountM = RQ03.value_counts()['Queens']
BcountM = RQ03.value_counts()['Brooklyn']
UcountM = RQ03.value_counts()['Unknown']
B1countM = RQ03.value_counts()['Bronx']
EcountM = RQ03.value_counts()['EWR']
SIcountM = RQ03.value_counts()['Staten Island']
BoroughNames03 = ['Manhattan', 'Queens', 'Brooklyn', 'Unknown', 'Bronx', 'EWR', 'Staten Island']
y_pos3 = np.arange(len(BoroughNames03))
BoroughTrips03 = [McountM,QcountM,BcountM,UcountM,B1countM,EcountM,SIcountM]
plt.bar(y_pos3, BoroughTrips03, align='center', alpha=0.5)
plt.xticks(y_pos3, BoroughNames03)
plt.ylabel('Average Trips')
plt.title('NYC Boroughs March Average Trips')
plt.show()
In [8]:
YellowCapsApril = YellowCapsApril.rename(columns = {'DOLocationID' : 'LocationID'})
RQ04 = pd.merge(YellowCapsApril, TaxiZoneLookUp, on="LocationID")
RQ04 = RQ04.Borough
McountA = RQ04.value_counts()['Manhattan']
QcountA = RQ04.value_counts()['Queens']
BcountA = RQ04.value_counts()['Brooklyn']
UcountA = RQ04.value_counts()['Unknown']
B1countA = RQ04.value_counts()['Bronx']
EcountA = RQ04.value_counts()['EWR']
SIcountA = RQ04.value_counts()['Staten Island']
BoroughNames04 = ['Manhattan', 'Queens', 'Brooklyn', 'Unknown', 'Bronx', 'EWR', 'Staten Island']
y_pos4 = np.arange(len(BoroughNames04))
BoroughTrips04 = [McountA,QcountA,BcountA,UcountA,B1countA,EcountA,SIcountA]
plt.bar(y_pos4, BoroughTrips04, align='center', alpha=0.5)
plt.xticks(y_pos4, BoroughNames04)
plt.ylabel('Average Trips')
plt.title('NYC Boroughs April Average Trips')
plt.show()
In [9]:
YellowCapsMay = YellowCapsMay.rename(columns = {'DOLocationID' : 'LocationID'})
RQ05 = pd.merge(YellowCapsMay, TaxiZoneLookUp, on="LocationID")
RQ05 = RQ05.Borough
McountMa = RQ05.value_counts()['Manhattan']
QcountMa = RQ05.value_counts()['Queens']
BcountMa = RQ05.value_counts()['Brooklyn']
UcountMa = RQ05.value_counts()['Unknown']
B1countMa = RQ05.value_counts()['Bronx']
EcountMa = RQ05.value_counts()['EWR']
SIcountMa = RQ05.value_counts()['Staten Island']
BoroughNames05 = ['Manhattan', 'Queens', 'Brooklyn', 'Unknown', 'Bronx', 'EWR', 'Staten Island']
y_pos5 = np.arange(len(BoroughNames05))
BoroughTrips05 = [McountMa,QcountMa,BcountMa,UcountMa,B1countMa,EcountMa,SIcountMa]
plt.bar(y_pos5, BoroughTrips05, align='center', alpha=0.5)
plt.xticks(y_pos5, BoroughNames05)
plt.ylabel('Average Trips')
plt.title('NYC Boroughs May Average Trips')
plt.show()
In [10]:
YellowCapsJune = YellowCapsJune.rename(columns = {'DOLocationID' : 'LocationID'})
RQ06 = pd.merge(YellowCapsJune, TaxiZoneLookUp, on="LocationID")
RQ06 = RQ06.Borough
McountJu = RQ06.value_counts()['Manhattan']
QcountJu = RQ06.value_counts()['Queens']
BcountJu = RQ06.value_counts()['Brooklyn']
UcountJu = RQ06.value_counts()['Unknown']
B1countJu = RQ06.value_counts()['Bronx']
EcountJu = RQ06.value_counts()['EWR']
SIcountJu = RQ06.value_counts()['Staten Island']
BoroughNames06 = ['Manhattan', 'Queens', 'Brooklyn', 'Unknown', 'Bronx', 'EWR', 'Staten Island']
y_pos6 = np.arange(len(BoroughNames06))
BoroughTrips06 = [McountJu,QcountJu,BcountJu,UcountJu,B1countJu,EcountJu,SIcountJu]
plt.bar(y_pos6, BoroughTrips06, align='center', alpha=0.5)
plt.xticks(y_pos6, BoroughNames06)
plt.ylabel('Average Trips')
plt.title('NYC Boroughs June Average Trips')
plt.show()
In [11]:
RQ4
print("Payment Types: Credit card = 1, Cash = 2, No charge = 3, 4Dispute = 4, Unknown = 5, Voided trip = 6")
Payment Types: Credit card = 1, Cash = 2, No charge = 3, 4Dispute = 4, Unknown = 5, Voided trip = 6
In [12]:
YellowCapsJanuary = YellowCapsJanuary.rename(columns = {'DOLocationID' : 'LocationID'})
YellowCapsJanuary = pd.merge(YellowCapsJanuary, TaxiZoneLookUp, on="LocationID")
RQBr = YellowCapsJanuary[YellowCapsJanuary['Borough'].isin(['Manhattan','Bronx','Brooklyn','EWR', 'Queens','Staten Island'])]
fig, ax = plt.subplots(figsize=(15,7))
RQBrPy = RQBr.groupby(['payment_type','Borough'])['payment_type'].count().unstack().plot(ax=ax)
plt.show()
In [13]:
Chi_Squared_test = chisquare(pd.crosstab(YellowCapsJanuary['Borough'],YellowCapsJanuary['payment_type']))
Chi_Squared_test 
Out[13]:
Power_divergenceResult(statistic=array([4351224.74058198, 2240211.32741206,   20535.07020814,
          6297.07873211]), pvalue=array([0., 0., 0., 0.]))
In [54]:
LoId = YellowCapsFeburary.LocationID
LoId = LoId.value_counts()
LoId.to_dict()
LoId
LoId.columns = ['Zone','Name']
In [67]:
NYC_Map = folium.Map(location=[40.6974881, -73.979681], zoom_start = 10, tiles="CartoDB dark_matter")
url = 'https://raw.githubusercontent.com/CriMenghini/ADM-2018/master/Homework_2/taxi_zones.json'
response = requests.get(url)
NYC_Map.choropleth(
    response.text,
    data = LoId,
    columns = ['Zone','Name'],
    key_on = 'feature.properties.LocationID',
    fill_color='YlOrRd', fill_opacity=0.7, line_opacity=0.2
)
folium.LayerControl().add_to(NYC_Map)
NYC_Map
Out[67]:
In [ ]: